Import the libraries

library(tidyverse)
library(rvest)
library(httr)
library(maps)
library(readxl)
library(plotly)
library(choroplethr)
library(choroplethrMaps)
drug_overdose = read_csv("./data/VSRR_Provisional_Drug_Overdose_Death_Counts.csv") %>% 
  janitor::clean_names()
state_level = c(state.name[1:8], "District of Columbia", state.name[9:32],"New York City", state.name[33:50])
drug_overdose_52 = 
  drug_overdose %>% 
  filter(!(state_name %in% c("United States"))) %>% 
  relocate(state_name) %>% 
  mutate(month = factor(month, levels = month.name), # change month and year to factor
         year = factor(year),
         state_name = factor(state_name, levels = state_level)) %>% 
  arrange(state_name) %>% 
  group_by(state_name, year) %>% 
  mutate(month = sort(month))

It’s hard to find drug-specific data for those state missing specific drug types. Opioids data is much easier to find. We should probably focus on opioids.
We drew a map of the opioids to justify our choice.

opioids_df = 
  drug_overdose_52 %>%  
  ungroup() %>% 
  select(state_name, year, month, indicator, data_value) %>% 
  filter(!(state_name %in% c("Alabama", "Arkansas", "Florida", "Idaho", "Louisiana", "Minnesota", "Nebraska", "North Dakota", "Pennsylvania")),
         str_detect(indicator, "T4")) %>% 
  mutate(opioids_yn = ifelse(str_detect(indicator, "opioids"), TRUE, FALSE)) %>% 
  group_by(year, month, opioids_yn) %>% 
  summarize(opioids_rate = mean(data_value, na.rm = TRUE))
## `summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.
opioids_df %>% 
  ungroup() %>% 
  ggplot(aes(x = month, y = opioids_rate, group = opioids_yn, color = opioids_yn))+
  geom_point()+
  geom_line()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 1))+
  facet_grid(.~year)


If we decided to focus on opioids, this will make my life much easier.

FL part: Overall review:

fl_death = 
  drug_overdose_52 %>% 
  filter(state_name %in% "Florida",
         indicator %in%  c("Number of Deaths", "Number of Drug Overdose Deaths")) %>% 
  select(year, month, indicator, deaths = data_value) %>% 
  pivot_wider(
    names_from = indicator,
    values_from = deaths
  ) %>% 
  janitor::clean_names() %>% 
  group_by(year, month) %>% 
  mutate(
    percent_overdose_death = number_of_drug_overdose_deaths / number_of_deaths
  )
## Adding missing grouping variables: `state_name`
fl_death  %>% 
  ggplot(aes(x = month, y = percent_overdose_death, group = year, color = year))+
  geom_point()+
  geom_line()

fl_death %>% 
  ggplot(aes(x = month, y = percent_overdose_death, group = NA))+
  geom_point()+
  geom_line()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 1))+
  facet_grid(.~year)

We can also focus just on opioids Data source: cdc wonder, request by queries, Drug overdose deaths are identified using underlying cause-of-death codes from the Tenth Revision of ICD (ICD–10): X40–X44 (unintentional), X60–X64 (suicide), X85 (homicide), and Y10–Y14 (undetermined). Opioids identified by selecting T40.0-40.4, 40.6

# read in data
fl_opi_death = read_csv("./data/fl_opi_death_15_19.csv") %>% 
  janitor::clean_names() %>% 
  select(state_name = state, year, month, deaths)

fl_opi_death_add = read_csv("./data/fl_opi_death_20_21.csv") %>% 
  janitor::clean_names() %>% 
  select(state_name = occurrence_state, year = year_code, month, deaths) 

fl_opi_death = bind_rows(fl_opi_death, fl_opi_death_add) %>% 
  filter(!is.na(month)) %>% 
  separate(month, into = c("month", "useless"), sep = "\\,") %>% 
  select(-useless) %>% 
  mutate(month = substr(month, 1,3),
  month = month.name[match(str_to_title(month), month.abb)],
  month = factor(month, levels = month.name),
  year = factor(year)) %>% 
  select(-state_name)

data could be suppressed as indicated by the cdc website, the data value also supported by florida government

But anyway let’s plot
First by year, this is the death count

fl_opi_death  %>% 
  ggplot(aes(x = month, y = deaths, group = year, color = year))+
  geom_point()+
  geom_line()

what about each month within a year

fl_opi_death %>% 
  ggplot(aes(x = month, y = deaths, group = NA))+
  geom_point()+
  geom_line()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 1))+
  facet_grid(.~year)

It’s climbing, every single month. The 2020 data is less probably because it’s provisional and some data still under investigation

Let’s analyze the number of death vs median household income

fl_eco_df = 
  read_csv("./data/median_household_income_fl.csv") %>% 
  janitor::clean_names() %>% 
  select(year, household_income_by_race, household_income_by_race_moe, geography) %>% 
  filter(
    str_detect(geography, "FL|United States|Florida"),
    year >= "2015") %>% 
  mutate(year = factor(year))
## Rows: 49 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Geography, ID Geography, Slug Geography
## dbl (4): ID Year, Year, Household Income by Race, Household Income by Race Moe
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fl_eco_df %>% 
  mutate(text_label = str_c("Year: ", year, "\nMedian Household Income: $", household_income_by_race, 
                            "\nMargin of error: ± $", household_income_by_race_moe)) %>% 
  plot_ly(
    x = ~year, y = ~household_income_by_race, color = ~geography, text = ~text_label, 
    alpha = 0.5, type = "scatter", mode = "markers+lines", colors = "viridis", error_y = ~list(array = household_income_by_race_moe)) %>% 
  layout(
    title = "Median Household Income: FLorida vs. The U.S",
         xaxis = list(title = "Year"),
         yaxis = list(title = "Median Household Income"))

Income and drug overdose death percent , by year

#patchwork
income_drug_df = 
  fl_death %>% 
  ungroup() %>% 
  group_by(year) %>% 
  summarize(overdose_death_rate = sum(number_of_drug_overdose_deaths)/sum(number_of_deaths)) %>% 
  inner_join(., fl_eco_df %>% filter(geography %in% "Florida"))
## Joining, by = "year"
income_drug_df %>% 
  ggplot(aes(x = household_income_by_race, y = overdose_death_rate, group = NA, color = year))+
  geom_point()+
  geom_line()

maps maps maps

fl_county_df = 
  read_csv("./data/NCHS_-_Drug_Poisoning_Mortality_by_County__United_States.csv") %>% 
  janitor::clean_names() %>% 
  filter(state %in% "Florida",
         year >= 2015) %>% 
  select(year, county, population, death_rate = model_based_death_rate) %>% 
  separate(county, into = c("county", "useless"), sep = " County") %>% 
  select(-useless) %>% 
  mutate(year = factor(year),
         county = str_to_lower(county)) %>% 
  relocate(county)
## Rows: 50176 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, County, Urban/Rural Category
## dbl (9): FIPS, Year, FIPS State, Population, Model-based Death Rate, Standar...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fl_county_df
## # A tibble: 268 × 4
##    county  year  population death_rate
##    <chr>   <fct>      <dbl>      <dbl>
##  1 alachua 2015      259177       12.1
##  2 alachua 2016      264287       13.5
##  3 alachua 2017      266820       12.9
##  4 alachua 2018      269956       11.7
##  5 baker   2015       27351       21.9
##  6 baker   2016       27872       21.7
##  7 baker   2017       28230       24.2
##  8 baker   2018       28355       23.0
##  9 bay     2015      181387       17.6
## 10 bay     2016      183384       24.5
## # … with 258 more rows
data(county.fips)
abc = county.fips %>% 
  separate(polyname, into = c("state", "county"), sep = "\\,") %>% 
  filter(state %in% "florida") %>% 
  select(-state) %>% 
  as.tibble()
fl_county_df = left_join(fl_county_df,abc, by = "county") %>% 
  select(county, year, death_rate, fips) %>% 
  filter(year == 2015)
fips_add = c(12027, 12091, 12109, 12111)
a = 1
for (i in c(13,46,55,56)){
  fl_county_df[i,4] = fips_add[a]
  a = a+1
}
fl_county_df %>% 
  group_by(fips) %>% 
  mutate(fips = as.numeric(fips)) %>% 
  rename(region = fips,
         value = death_rate) %>% 
  county_choropleth(state_zoom = c("florida"),
                     legend = "death_rate")